{
 "metadata": {
  "name": "",
  "signature": "sha256:28f034a531b1925724959079a55f964ded07e33205fc78a28e8e1109a5cb8032"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Example: 2012 Federal Election Commission Database"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import division\n",
      "from numpy.random import randn\n",
      "import numpy as np\n",
      "import os\n",
      "import matplotlib.pyplot as plt\n",
      "np.random.seed(12345)\n",
      "plt.rc('figure', figsize=(10, 6))\n",
      "from pandas import *\n",
      "import pandas\n",
      "np.set_printoptions(precision=4)\n",
      "%cd book_scripts/fec"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/home/phillip/Documents/code/py/pandas-book/rev_539000/book_scripts/fec\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec = read_csv('P00000001-ALL.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>cmte_id</th>\n",
        "      <th>cand_id</th>\n",
        "      <th>cand_nm</th>\n",
        "      <th>contbr_nm</th>\n",
        "      <th>contbr_city</th>\n",
        "      <th>...</th>\n",
        "      <th>receipt_desc</th>\n",
        "      <th>memo_cd</th>\n",
        "      <th>memo_text</th>\n",
        "      <th>form_tp</th>\n",
        "      <th>file_num</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>             HARVEY, WILLIAM</td>\n",
        "      <td>             MOBILE</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 736166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>             HARVEY, WILLIAM</td>\n",
        "      <td>             MOBILE</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 736166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>               SMITH, LANIER</td>\n",
        "      <td>             LANETT</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 749073</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>            BLEVINS, DARONDA</td>\n",
        "      <td>            PIGGOTT</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 749073</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>          WARDENBURG, HAROLD</td>\n",
        "      <td> HOT SPRINGS NATION</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 736166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>              BECKMAN, JAMES</td>\n",
        "      <td>         SPRINGDALE</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 736166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6      </th>\n",
        "      <td> C00410118</td>\n",
        "      <td> P20002978</td>\n",
        "      <td> Bachmann, Michelle</td>\n",
        "      <td>            BLEVINS, DARONDA</td>\n",
        "      <td>            PIGGOTT</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 736166</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001724</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td> HEFFERNAN, JILL PRINCE MRS.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001725</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>            ELWOOD, MIKE MR.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001726</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>        GORMAN, CHRIS D. MR.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td> REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM...</td>\n",
        "      <td> NaN</td>\n",
        "      <td> REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM...</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001727</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>         DUFFY, DAVID A. MR.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001728</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>         GRANE, BRYAN F. MR.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001729</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>          TOLBERT, DARYL MR.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1001730</th>\n",
        "      <td> C00500587</td>\n",
        "      <td> P20003281</td>\n",
        "      <td>        Perry, Rick</td>\n",
        "      <td>      ANDERSON, MARILEE MRS.</td>\n",
        "      <td>     INFO REQUESTED</td>\n",
        "      <td>...</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> NaN</td>\n",
        "      <td>                                               NaN</td>\n",
        "      <td> SA17A</td>\n",
        "      <td> 751678</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>1001731 rows \u00d7 16 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "           cmte_id    cand_id             cand_nm  \\\n",
        "0        C00410118  P20002978  Bachmann, Michelle   \n",
        "1        C00410118  P20002978  Bachmann, Michelle   \n",
        "2        C00410118  P20002978  Bachmann, Michelle   \n",
        "3        C00410118  P20002978  Bachmann, Michelle   \n",
        "4        C00410118  P20002978  Bachmann, Michelle   \n",
        "5        C00410118  P20002978  Bachmann, Michelle   \n",
        "6        C00410118  P20002978  Bachmann, Michelle   \n",
        "...            ...        ...                 ...   \n",
        "1001724  C00500587  P20003281         Perry, Rick   \n",
        "1001725  C00500587  P20003281         Perry, Rick   \n",
        "1001726  C00500587  P20003281         Perry, Rick   \n",
        "1001727  C00500587  P20003281         Perry, Rick   \n",
        "1001728  C00500587  P20003281         Perry, Rick   \n",
        "1001729  C00500587  P20003281         Perry, Rick   \n",
        "1001730  C00500587  P20003281         Perry, Rick   \n",
        "\n",
        "                           contbr_nm         contbr_city         ...          \\\n",
        "0                    HARVEY, WILLIAM              MOBILE         ...           \n",
        "1                    HARVEY, WILLIAM              MOBILE         ...           \n",
        "2                      SMITH, LANIER              LANETT         ...           \n",
        "3                   BLEVINS, DARONDA             PIGGOTT         ...           \n",
        "4                 WARDENBURG, HAROLD  HOT SPRINGS NATION         ...           \n",
        "5                     BECKMAN, JAMES          SPRINGDALE         ...           \n",
        "6                   BLEVINS, DARONDA             PIGGOTT         ...           \n",
        "...                              ...                 ...         ...           \n",
        "1001724  HEFFERNAN, JILL PRINCE MRS.      INFO REQUESTED         ...           \n",
        "1001725             ELWOOD, MIKE MR.      INFO REQUESTED         ...           \n",
        "1001726         GORMAN, CHRIS D. MR.      INFO REQUESTED         ...           \n",
        "1001727          DUFFY, DAVID A. MR.      INFO REQUESTED         ...           \n",
        "1001728          GRANE, BRYAN F. MR.      INFO REQUESTED         ...           \n",
        "1001729           TOLBERT, DARYL MR.      INFO REQUESTED         ...           \n",
        "1001730       ANDERSON, MARILEE MRS.      INFO REQUESTED         ...           \n",
        "\n",
        "                                              receipt_desc memo_cd  \\\n",
        "0                                                      NaN     NaN   \n",
        "1                                                      NaN     NaN   \n",
        "2                                                      NaN     NaN   \n",
        "3                                                      NaN     NaN   \n",
        "4                                                      NaN     NaN   \n",
        "5                                                      NaN     NaN   \n",
        "6                                                      NaN     NaN   \n",
        "...                                                    ...     ...   \n",
        "1001724                                                NaN     NaN   \n",
        "1001725                                                NaN     NaN   \n",
        "1001726  REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM...     NaN   \n",
        "1001727                                                NaN     NaN   \n",
        "1001728                                                NaN     NaN   \n",
        "1001729                                                NaN     NaN   \n",
        "1001730                                                NaN     NaN   \n",
        "\n",
        "                                                 memo_text form_tp  file_num  \n",
        "0                                                      NaN   SA17A    736166  \n",
        "1                                                      NaN   SA17A    736166  \n",
        "2                                                      NaN   SA17A    749073  \n",
        "3                                                      NaN   SA17A    749073  \n",
        "4                                                      NaN   SA17A    736166  \n",
        "5                                                      NaN   SA17A    736166  \n",
        "6                                                      NaN   SA17A    736166  \n",
        "...                                                    ...     ...       ...  \n",
        "1001724                                                NaN   SA17A    751678  \n",
        "1001725                                                NaN   SA17A    751678  \n",
        "1001726  REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM...   SA17A    751678  \n",
        "1001727                                                NaN   SA17A    751678  \n",
        "1001728                                                NaN   SA17A    751678  \n",
        "1001729                                                NaN   SA17A    751678  \n",
        "1001730                                                NaN   SA17A    751678  \n",
        "\n",
        "[1001731 rows x 16 columns]"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec.ix[123456]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "cmte_id            C00431445\n",
        "cand_id            P80003338\n",
        "cand_nm        Obama, Barack\n",
        "contbr_nm        ELLMAN, IRA\n",
        "contbr_city            TEMPE\n",
        "...\n",
        "contb_receipt_dt    01-DEC-11\n",
        "receipt_desc              NaN\n",
        "memo_cd                   NaN\n",
        "memo_text                 NaN\n",
        "form_tp                 SA17A\n",
        "file_num               772372\n",
        "Name: 123456, Length: 16, dtype: object"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "unique_cands = fec.cand_nm.unique()\n",
      "unique_cands\n",
      "unique_cands[2]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "'Obama, Barack'"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parties = {'Bachmann, Michelle': 'Republican',\n",
      "           'Cain, Herman': 'Republican',\n",
      "           'Gingrich, Newt': 'Republican',\n",
      "           'Huntsman, Jon': 'Republican',\n",
      "           'Johnson, Gary Earl': 'Republican',\n",
      "           'McCotter, Thaddeus G': 'Republican',\n",
      "           'Obama, Barack': 'Democrat',\n",
      "           'Paul, Ron': 'Republican',\n",
      "           'Pawlenty, Timothy': 'Republican',\n",
      "           'Perry, Rick': 'Republican',\n",
      "           \"Roemer, Charles E. 'Buddy' III\": 'Republican',\n",
      "           'Romney, Mitt': 'Republican',\n",
      "           'Santorum, Rick': 'Republican'}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "parties = {'Bachmann, Michelle': 'Republican',\n",
      "           'Cain, Herman': 'Republican',\n",
      "           'Gingrich, Newt': 'Republican',\n",
      "           'Huntsman, Jon': 'Republican',\n",
      "           'Johnson, Gary Earl': 'Republican',\n",
      "           'McCotter, Thaddeus G': 'Republican',\n",
      "           'Obama, Barack': 'Democrat',\n",
      "           'Paul, Ron': 'Republican',\n",
      "           'Pawlenty, Timothy': 'Republican',\n",
      "           'Perry, Rick': 'Republican',\n",
      "           \"Roemer, Charles E. 'Buddy' III\": 'Republican',\n",
      "           'Romney, Mitt': 'Republican',\n",
      "           'Santorum, Rick': 'Republican'}"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec.cand_nm[123456:123461]\n",
      "fec.cand_nm[123456:123461].map(parties)\n",
      "# Add it as a column\n",
      "fec['party'] = fec.cand_nm.map(parties)\n",
      "fec['party'].value_counts()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 10,
       "text": [
        "Democrat      593746\n",
        "Republican    407985\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "(fec.contb_receipt_amt > 0).value_counts()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 11,
       "text": [
        "True     991475\n",
        "False     10256\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec = fec[fec.contb_receipt_amt > 0]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack', 'Romney, Mitt'])]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 15
    }
   ],
   "metadata": {}
  }
 ]
}